library(tidyverse)
library(readxl)
library(ggrepel)
library(ggprism)
library(ggpubr)
library(survival)
library(survminer)
library(plotly)
# Load the aggregated data for meta analysis
meta <- read_excel(path = "data/FUS_ALS.xlsx", sheet = 2L)
# FUS domain table
FUS_domains <- tibble(
domain = c("QGSY-rich", "RGG1", "RRM", "RGG2", "ZnF", "RGG3", "NLS"),
start = c(1, 166, 286, 371, 424, 453, 506),
end = c(165, 267, 367, 422, 450, 501, 526),
domain_color = c("QGSY-rich" = "#3399CC", "RGG1" = "#666699", "RRM" = "#CC6666", "RGG2" = "#666699", "ZnF" = "#FFCC66", "RGG3" = "#666699", "NLS" = "#999933")
)
# Plot mutations over FUS domain architecture
plt1 <- meta %>%
filter(., Mutation_Type %in% c("missense", "nonsense", "frameshift")) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., cdf = stats::ecdf(Mutation_position)(Mutation_position)) %>%
add_row(., Mutation_position = 0, cdf = 0) %>%
add_row(., Mutation_position = 526, cdf = 1)
plt1 <- ggplot(data = plt1, mapping = aes(x = Mutation_position)) +
## Domain annotation
geom_line(data = tibble(x = c(1, 526), y = c(-0.1, -0.1)), mapping = aes(x = x, y = y), inherit.aes = FALSE) +
geom_rect(
data = FUS_domains,
mapping = aes(xmin = start, xmax = end, ymin = -0.08, ymax = -0.12, fill = domain),
color = "black",
inherit.aes = FALSE,
show.legend = FALSE
) +
geom_text(data = FUS_domains, mapping = aes(x = 0.5*(start + end), y = -0.1, label = domain), size = 2, color = "white") +
scale_fill_manual(values = FUS_domains$domain_color) +
## Annotate distribution
geom_hline(yintercept = 0.15, linetype = "dashed", color = "grey") +
annotate(geom = "text", x = 10, y = 0.22, label = "85%", color = "grey") +
annotate(geom = "text", x = 10, y = 0.08, label = "15%", color = "grey") +
## Cumulative Fraction of ALS cases
geom_step(data = plt1, mapping = aes(y = cdf), color = "black") +
geom_rug(
data = drop_na(plt1, Mutation_Type),
mapping = aes(
x = Mutation_position,
color = forcats::fct_relevel(
.f = Mutation_Type,
c("missense", "nonsense", "frameshift")
)
),
alpha = 0.2,
inherit.aes = FALSE
) +
## Labels, Ticks, and themes
scale_y_continuous(limits = c(-0.15, 1)) +
scale_x_continuous(limits = c(0, 526), breaks = c(seq(0, 500, 100), 526), labels = c(seq(0, 400, 100), "", 526)) +
labs(x = "Position of mutation in FUS", y = "Cumulative fraction of ALS cases", fill = "Mutation") +
theme_prism()
plt1

ggplotly(hide_legend(plt1))
# Age of onset
plt2 <- meta %>%
filter(., !is.na(Mutation_protein)) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
ggplot(., aes(x = Age_of_onset, color = fct_lump_min(f = Mutation_protein, min = 10))) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey") +
geom_hline(yintercept = 1, linetype = "dashed", color = "grey") +
stat_ecdf(geom = "step") +
geom_rug(alpha = 0.5) +
labs(x = "Age at onset in years", y = "Cumulative fraction", color = "FUS mutation") +
scale_x_continuous(limits = c(0,80), breaks = seq(from = 0, to = 80, by = 10)) +
theme_prism()
# Duraton until endpoint (respiratory support or death)
## ... in months
plt3 <- meta %>%
filter(., !is.na(Mutation_protein)) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
ggplot(data = ., mapping = aes(x = Duration_in_months, color = fct_lump_min(f = Mutation_protein, min = 10))) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey") +
geom_hline(yintercept = 1, linetype = "dashed", color = "grey") +
geom_step(aes(y = 1 - ..y..), stat = "ecdf") +
geom_rug(alpha = 0.5) +
labs(x = "Duration until endpoint in months", y = "Cumulative fraction", color = "FUS mutation") +
scale_x_continuous(breaks = seq(from = 0, to = 216, by = 24)) +
theme_prism()
## ... in years
plt3b <- meta %>%
filter(., !is.na(Mutation_protein)) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
ggplot(data = ., mapping = aes(x = Duration_in_months, color = fct_lump_min(f = Mutation_protein, min = 10))) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey") +
geom_hline(yintercept = 1, linetype = "dashed", color = "grey") +
geom_step(aes(y = 1 - ..y..), stat = "ecdf") +
geom_rug(alpha = 0.5) +
labs(x = "Duration until endpoint in years", y = "Cumulative fraction", color = "FUS mutation") +
scale_x_continuous(breaks = seq(from = 0, to = 216, by = 24), labels = seq(from = 0, to = 18, by = 2)) +
theme_prism()
## Plots
ggarrange(plt2, plt3b, common.legend = TRUE)

# Sex ratio
## With inspiration from https://www.robertlanfear.com/blog/files/visualising_gender_balance_R.html
plt4 <- meta %>%
filter(., !is.na(Mutation_protein)) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
ggplot(data = ., mapping = aes(x = Mutation_protein, fill = Sex)) +
geom_bar(data = . %>% filter(., Sex == "M"), aes(color = Sex), fill = NA, linetype = "dashed", show.legend = FALSE) +
geom_bar(data = . %>% filter(., Sex == "F"), color = "black") +
geom_bar(data = . %>% filter(., Sex == "F"), aes(y =..count..*(-1), color = Sex), fill = NA, linetype = "dashed", show.legend = FALSE) +
geom_bar(data = . %>% filter(., Sex == "M"), color = "black", aes(y =..count..*(-1))) +
labs(x = "Mutations", y = "Cases", color = "FUS mutation") +
scale_y_continuous(limits = c(-120, 120), breaks = seq(-120, 120, 30), labels = abs(seq(-120, 120, 30))) +
scale_x_discrete(limits = rev) +
scale_fill_manual(values = c("#D55E00", "#0072B2")) +
scale_color_manual(values = c("#D55E00", "#0072B2")) +
guides(color = FALSE) +
coord_flip() +
theme_prism()
plt4

# Pie charts
## Theme, http://www.sthda.com/english/wiki/ggplot2-pie-chart-quick-start-guide-r-software-and-data-visualization
## https://ggplot2.tidyverse.org/reference/position_stack.html
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
# Inheritance pies
plt5 <- meta %>%
filter(., !is.na(Mutation_protein), !(is.na(Inheritance))) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
group_by(., Mutation_protein, Inheritance) %>%
summarise(., cases = n()) %>%
ungroup(.) %>%
ggplot(data = ., mapping = aes(x = "", y = cases, fill = Inheritance)) +
facet_wrap(~ Mutation_protein, nrow = 1) +
geom_col(position = "fill", color = "black") +
geom_text(aes(label = cases), color = "white", size = 3, position = position_fill(vjust = .5)) +
coord_polar("y", start=0) +
scale_fill_manual(values = c("#E69F00", "#56B4E9")) +
blank_theme +
theme(axis.text.x=element_blank())
plt5

# Onset site pies
plt6 <- meta %>%
mutate(
.,
Onset_site = case_when(
str_detect(Onset, "Limb") ~ "spinal",
str_detect(Onset, "Neck") ~ "spinal",
str_detect(Onset, "Bulbar") ~ "bulbar",
TRUE ~ Onset
)
) %>%
filter(., !is.na(Mutation_protein), Onset_site %in% c("spinal", "bulbar")) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
group_by(., Mutation_protein, Onset_site) %>%
summarise(., cases = n()) %>%
ungroup(.) %>%
ggplot(data = ., mapping = aes(x = "", y = cases, fill = Onset_site)) +
facet_wrap(~ Mutation_protein, nrow = 1) +
geom_col(position = "fill", color = "black") +
geom_text(aes(label = cases), color = "white", size = 3, position = position_fill(vjust = .5)) +
coord_polar("y", start=0) +
scale_fill_manual(values = c("#882255", "#44AA99")) +
blank_theme +
theme(axis.text.x=element_blank())
plt6

# Figure layout
lyt <- ggarrange(
plt1,
ggarrange(
ggarrange(
plt2,
ggarrange(
plt5,
plt6,
nrow = 2, ncol = 1,
labels = c("d", "e")
),
nrow = 1, ncol = 2,
labels = "b"
),
ggarrange(
plt3,
plt4,
nrow = 1, ncol = 2,
labels = c("c", "f")
),
nrow = 2, ncol = 1
),
labels = c("a"),
nrow = 2, ncol = 1
)
# Summary statistics
meta %>%
filter(., !is.na(Mutation_protein)) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
group_by(., Mutation_protein) %>%
summarize(
.,
Cases = n(),
Age_of_onset_mean = mean(Age_of_onset, na.rm = TRUE),
Age_of_onset_sd = sd(Age_of_onset, na.rm = TRUE),
Age_of_onset_med = median(Age_of_onset, na.rm = TRUE),
Duration_in_months_mean = mean(Duration_in_months, na.rm = TRUE),
Duration_in_months_sd = sd(Duration_in_months, na.rm = TRUE),
Duration_in_months_med = median(Duration_in_months, na.rm = TRUE)
)
## # A tibble: 8 × 8
## Mutation_protein Cases Age_of_onset_mean Age_of_onset_sd Age_of_onset_med
## <fct> <int> <dbl> <dbl> <dbl>
## 1 p.P525L 42 21.1 7.86 19
## 2 p.R495X 34 31.1 11.3 31
## 3 p.R514S 13 46.8 12.9 48
## 4 p.R521C 104 40.9 11.5 39
## 5 p.R521G 11 44.1 14.2 46
## 6 p.R521H 63 47.9 11.2 47
## 7 p.R521L 36 39.2 10.3 37
## 8 Other 199 41.3 17.9 40
## # ℹ 3 more variables: Duration_in_months_mean <dbl>,
## # Duration_in_months_sd <dbl>, Duration_in_months_med <dbl>
Survival analysis
## Onset
fusals <- meta %>%
filter(., Mutation_Type %in% c("missense", "nonsense", "frameshift")) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
mutate(
status = case_when(
is.na(Asymptomatic_carrier_age_in_years) ~ 2,
TRUE ~ 1
),
time = case_when(
status == 1 ~ Asymptomatic_carrier_age_in_years,
status == 2 ~ Age_of_onset,
TRUE ~ 0
)
) %>%
filter(., !is.na(time))
fit <- survfit(Surv(time, status) ~ Mutation_protein, data = fusals)
#summary(fit)
ggsurvplot(
fit = fit,
pval = TRUE,
conf.int = TRUE,
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
fun = "event"
) +
labs(x = "Age of onset in years")

## Log-Rank test comparing survival curves: survdiff()
surv_diff <- survdiff(Surv(time, status) ~ Mutation_protein, data = fusals)
surv_diff
## Call:
## survdiff(formula = Surv(time, status) ~ Mutation_protein, data = fusals)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Mutation_protein=p.P525L 41 41 7.52 149.1149 160.3604
## Mutation_protein=p.R495X 31 29 18.13 6.5242 7.1342
## Mutation_protein=p.R514S 13 13 16.37 0.6956 0.7628
## Mutation_protein=p.R521C 86 84 80.58 0.1449 0.1895
## Mutation_protein=p.R521G 9 9 10.02 0.1047 0.1124
## Mutation_protein=p.R521H 52 52 68.06 3.7891 4.7782
## Mutation_protein=p.R521L 34 32 33.69 0.0843 0.0972
## Mutation_protein=Other 166 160 185.63 3.5389 6.8609
##
## Chisq= 175 on 7 degrees of freedom, p= <2e-16
## Duration to endpoint
fusals <- meta %>%
filter(., Mutation_Type %in% c("missense", "nonsense", "frameshift")) %>%
filter(., Gene == "FUS", str_detect(Disease, "ALS")) %>%
mutate(., Mutation_protein = fct_lump_min(Mutation_protein, min = 10)) %>%
mutate(
status = case_when(
is.na(Alive_in_months) ~ 2,
TRUE ~ 1
),
time = case_when(
status == 1 ~ Alive_in_months,
status == 2 ~ Duration_in_months,
TRUE ~ 0
)
) %>%
filter(., !is.na(time))
fit <- survfit(Surv(time, status) ~ Mutation_protein, data = fusals)
#summary(fit)
ggsurvplot(
fit = fit,
pval = TRUE,
conf.int = TRUE,
linetype = "strata", # Change line type by groups
surv.median.line = "hv" # Specify median survival
) +
labs(x = "Duration until endpoint in moths")

## Log-Rank test comparing survival curves: survdiff()
surv_diff <- survdiff(Surv(time, status) ~ Mutation_protein, data = fusals)
surv_diff
## Call:
## survdiff(formula = Surv(time, status) ~ Mutation_protein, data = fusals)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Mutation_protein=p.P525L 39 35 16.19 21.849 25.174
## Mutation_protein=p.R495X 28 27 13.87 12.426 14.089
## Mutation_protein=p.R514S 12 12 6.48 4.705 5.173
## Mutation_protein=p.R521C 76 69 62.12 0.761 1.018
## Mutation_protein=p.R521G 8 6 4.76 0.321 0.352
## Mutation_protein=p.R521H 48 44 57.96 3.363 4.527
## Mutation_protein=p.R521L 31 30 20.58 4.308 5.003
## Mutation_protein=Other 126 103 144.03 11.687 24.649
##
## Chisq= 69.4 on 7 degrees of freedom, p= 2e-12